In [1]:
import os
import torch
import librosa
from IPython.display import Audio
import numpy as np
from IPython.display import HTML
from base64 import b64encode
import matplotlib.pyplot as plt
In [2]:
import numpy
numpy.float = numpy.float64
numpy.int = numpy.int_
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [3]:
home = '/home/aiv-gpu-019/test'
sub1 = 'mouth'
sub2 = 'wav'
fn = os.listdir(os.path.join(home, sub1))
print(fn[0])
lip_K_5_M_04_C955_A_012_9.mp4
In [ ]:
In [ ]:
In [4]:
def play_video(video_path, width=200):
mp4 = open(video_path,'rb').read()
data_url = "data:video/mp4;base64," + b64encode(mp4).decode()
return HTML(f"""
<video width={width} controls>
<source src="{data_url}" type="video/mp4">
</video>
""")
In [5]:
play_video(os.path.join(home, fn[0]), width=300)
Out[5]:
In [6]:
y, sr = librosa.load(os.path.join(home, 'wav', fn[0].split('.')[0]+'.wav'))
In [7]:
Audio(y, rate=sr)
Out[7]:
In [8]:
mouth_roi_path = os.path.join(home, sub1,'mouth_'+fn[0])
In [9]:
#!ffmpeg -i {os.path.join(home, sub1,fn[0])} -c:v libx264 -crf 28 {mouth_roi_path}
ffmpeg version 4.4.2-0ubuntu0.22.04.1 Copyright (c) 2000-2021 the FFmpeg developers
built with gcc 11 (Ubuntu 11.2.0-19ubuntu1)
configuration: --prefix=/usr --extra-version=0ubuntu0.22.04.1 --toolchain=hardened --libdir=/usr/lib/x86_64-linux-gnu --incdir=/usr/include/x86_64-linux-gnu --arch=amd64 --enable-gpl --disable-stripping --enable-gnutls --enable-ladspa --enable-libaom --enable-libass --enable-libbluray --enable-libbs2b --enable-libcaca --enable-libcdio --enable-libcodec2 --enable-libdav1d --enable-libflite --enable-libfontconfig --enable-libfreetype --enable-libfribidi --enable-libgme --enable-libgsm --enable-libjack --enable-libmp3lame --enable-libmysofa --enable-libopenjpeg --enable-libopenmpt --enable-libopus --enable-libpulse --enable-librabbitmq --enable-librubberband --enable-libshine --enable-libsnappy --enable-libsoxr --enable-libspeex --enable-libsrt --enable-libssh --enable-libtheora --enable-libtwolame --enable-libvidstab --enable-libvorbis --enable-libvpx --enable-libwebp --enable-libx265 --enable-libxml2 --enable-libxvid --enable-libzimg --enable-libzmq --enable-libzvbi --enable-lv2 --enable-omx --enable-openal --enable-opencl --enable-opengl --enable-sdl2 --enable-pocketsphinx --enable-librsvg --enable-libmfx --enable-libdc1394 --enable-libdrm --enable-libiec61883 --enable-chromaprint --enable-frei0r --enable-libx264 --enable-shared
libavutil 56. 70.100 / 56. 70.100
libavcodec 58.134.100 / 58.134.100
libavformat 58. 76.100 / 58. 76.100
libavdevice 58. 13.100 / 58. 13.100
libavfilter 7.110.100 / 7.110.100
libswscale 5. 9.100 / 5. 9.100
libswresample 3. 9.100 / 3. 9.100
libpostproc 55. 9.100 / 55. 9.100
Input #0, mov,mp4,m4a,3gp,3g2,mj2, from '/home/aiv-gpu-019/test/mouth/lip_K_5_M_04_C955_A_012_9.mp4':
Metadata:
major_brand : isom
minor_version : 512
compatible_brands: isomiso2mp41
encoder : Lavf59.27.100
Duration: 00:00:06.32, start: 0.000000, bitrate: 108 kb/s
Stream #0:0(und): Video: mpeg4 (Simple Profile) (mp4v / 0x7634706D), yuv420p, 96x96 [SAR 1:1 DAR 1:1], 106 kb/s, 25 fps, 25 tbr, 12800 tbn, 25 tbc (default)
Metadata:
handler_name : VideoHandler
vendor_id : [0][0][0][0]
Stream mapping:
Stream #0:0 -> #0:0 (mpeg4 (native) -> h264 (libx264))
Press [q] to stop, [?] for help
[libx264 @ 0x562e4819c040] using SAR=1/1
[libx264 @ 0x562e4819c040] using cpu capabilities: MMX2 SSE2Fast SSSE3 SSE4.2 AVX FMA3 BMI2 AVX2 AVX512
[libx264 @ 0x562e4819c040] profile High, level 1.0, 4:2:0, 8-bit
[libx264 @ 0x562e4819c040] 264 - core 163 r3060 5db6aa6 - H.264/MPEG-4 AVC codec - Copyleft 2003-2021 - http://www.videolan.org/x264.html - options: cabac=1 ref=3 deblock=1:0:0 analyse=0x3:0x113 me=hex subme=7 psy=1 psy_rd=1.00:0.00 mixed_ref=1 me_range=16 chroma_me=1 trellis=1 8x8dct=1 cqm=0 deadzone=21,11 fast_pskip=1 chroma_qp_offset=-2 threads=3 lookahead_threads=1 sliced_threads=0 nr=0 decimate=1 interlaced=0 bluray_compat=0 constrained_intra=0 bframes=3 b_pyramid=2 b_adapt=1 b_bias=0 direct=1 weightb=1 open_gop=0 weightp=2 keyint=250 keyint_min=25 scenecut=40 intra_refresh=0 rc_lookahead=40 rc=crf mbtree=1 crf=28.0 qcomp=0.60 qpmin=0 qpmax=69 qpstep=4 ip_ratio=1.40 aq=1:1.00
Output #0, mp4, to '/home/aiv-gpu-019/test/mouth/mouth_lip_K_5_M_04_C955_A_012_9.mp4':
Metadata:
major_brand : isom
minor_version : 512
compatible_brands: isomiso2mp41
encoder : Lavf58.76.100
Stream #0:0(und): Video: h264 (avc1 / 0x31637661), yuv420p(progressive), 96x96 [SAR 1:1 DAR 1:1], q=2-31, 25 fps, 12800 tbn (default)
Metadata:
handler_name : VideoHandler
vendor_id : [0][0][0][0]
encoder : Lavc58.134.100 libx264
Side data:
cpb: bitrate max/min/avg: 0/0/0 buffer size: 0 vbv_delay: N/A
frame= 158 fps=0.0 q=-1.0 Lsize= 17kB time=00:00:06.20 bitrate= 22.8kbits/s speed=7.31x
video:16kB audio:0kB subtitle:0kB other streams:0kB global headers:0kB muxing overhead: 11.298546%
[libx264 @ 0x562e4819c040] frame I:1 Avg QP:29.72 size: 653
[libx264 @ 0x562e4819c040] frame P:140 Avg QP:29.50 size: 100
[libx264 @ 0x562e4819c040] frame B:17 Avg QP:36.04 size: 31
[libx264 @ 0x562e4819c040] consecutive B-frames: 83.5% 6.3% 0.0% 10.1%
[libx264 @ 0x562e4819c040] mb I I16..4: 0.0% 66.7% 33.3%
[libx264 @ 0x562e4819c040] mb P I16..4: 0.1% 0.3% 0.0% P16..4: 42.3% 10.2% 3.7% 0.0% 0.0% skip:43.4%
[libx264 @ 0x562e4819c040] mb B I16..4: 0.0% 0.2% 0.0% B16..8: 42.8% 0.8% 0.2% direct: 0.2% skip:55.9% L0:40.7% L1:57.8% BI: 1.5%
[libx264 @ 0x562e4819c040] 8x8 transform intra:73.7% inter:65.2%
[libx264 @ 0x562e4819c040] coded y,uvDC,uvAC intra: 79.4% 87.7% 28.1% inter: 11.5% 10.9% 0.0%
[libx264 @ 0x562e4819c040] i16 v,h,dc,p: 67% 0% 33% 0%
[libx264 @ 0x562e4819c040] i8 v,h,dc,ddl,ddr,vr,hd,vl,hu: 16% 15% 42% 2% 7% 1% 5% 4% 8%
[libx264 @ 0x562e4819c040] i4 v,h,dc,ddl,ddr,vr,hd,vl,hu: 14% 22% 25% 6% 6% 3% 18% 4% 4%
[libx264 @ 0x562e4819c040] i8c dc,h,v,p: 44% 26% 26% 4%
[libx264 @ 0x562e4819c040] Weighted P-Frames: Y:6.4% UV:1.4%
[libx264 @ 0x562e4819c040] ref P L0: 67.5% 25.1% 5.2% 2.1% 0.1%
[libx264 @ 0x562e4819c040] ref B L0: 80.9% 17.8% 1.3%
[libx264 @ 0x562e4819c040] ref B L1: 96.9% 3.1%
[libx264 @ 0x562e4819c040] kb/s:19.24
In [10]:
play_video(mouth_roi_path, width=300)
Out[10]:
In [ ]:
#csv 파일 생성
!python3 TSV_generate.py --dataset /home/aiv-gpu-019/test --mouth_fd mouth --label_fd label --wav_fd wav --spm_path /DATA/temp/auto_avsr/spm/unigram/bpe_1207
In [11]:
%cat /home/aiv-gpu-019/test/demo_test.csv
/home/aiv-gpu-019/test,lip_K_5_M_04_C955_A_012_9.mp4,158,0
In [12]:
%cd /DATA/temp/auto_avsr
/DATA/temp/auto_avsr
/home/aiv-gpu-019/.local/lib/python3.10/site-packages/IPython/core/magics/osm.py:417: UserWarning: This is now an optional IPython functionality, setting dhist requires you to install the `pickleshare` library. self.shell.db['dhist'] = compress_dhist(dhist)[-100:]
In [24]:
!python3 eval.py data.modality=video data.dataset.root_dir=/DATA/temp/auto_avsr data.dataset.test_file=/home/aiv-gpu-019/test/demo_test.csv label_flag=0 mouth_dir=mouth wav_dir=wav pretrained_model_path=train1207/cleaned.ckpt
2024-11-08 18:21:46.710080: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations: AVX2 AVX512F AVX512_VNNI FMA
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-11-08 18:21:46.994319: I tensorflow/core/util/util.cc:169] oneDNN custom operations are on. You may see slightly different numerical results due to floating-point round-off errors from different computation orders. To turn them off, set the environment variable `TF_ENABLE_ONEDNN_OPTS=0`.
2024-11-08 18:21:47.043120: E tensorflow/stream_executor/cuda/cuda_blas.cc:2981] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-11-08 18:21:48.022246: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer.so.7'; dlerror: libnvinfer.so.7: cannot open shared object file: No such file or directory
2024-11-08 18:21:48.022387: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libnvinfer_plugin.so.7'; dlerror: libnvinfer_plugin.so.7: cannot open shared object file: No such file or directory
2024-11-08 18:21:48.022404: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Cannot dlopen some TensorRT libraries. If you would like to use Nvidia GPU with TensorRT, please make sure the missing libraries mentioned above are installed properly.
/home/aiv-gpu-019/.local/lib/python3.10/site-packages/pytorch_lightning/core/lightning.py:2054: DeprecationWarning: `torch.distributed._sharded_tensor` will be deprecated, use `torch.distributed._shard.sharded_tensor` instead
from torch.distributed._sharded_tensor import pre_load_state_dict_hook, state_dict_hook
/DATA/temp/auto_avsr/lightning.py:64: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
ckpt = torch.load(self.cfg.pretrained_model_path, map_location=lambda storage, loc: storage)
/DATA/temp/auto_avsr/eval.py:23: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.
modelmodule.model.load_state_dict(torch.load(cfg.pretrained_model_path, map_location=lambda storage, loc: storage))
/home/aiv-gpu-019/.local/lib/python3.10/site-packages/pytorch_lightning/trainer/data_loading.py:132: UserWarning: The dataloader, test_dataloader 0, does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` (try 16 which is the number of cpus on this machine) in the `DataLoader` init to improve performance.
rank_zero_warn(
Testing: 0it [00:00, ?it/s]
predicted:올림픽에서 우리나라가 출전하지 않은 종목에는 관심이 없어요
Testing: 100%|████████████████████████████████████| 1/1 [00:06<00:00, 6.64s/it]/home/aiv-gpu-019/.local/lib/python3.10/site-packages/pytorch_lightning/trainer/connectors/logger_connector/result.py:227: UserWarning: You called `self.log('sentence_count', ...)` in your `on_test_epoch_end` but the value needs to be floating point. Converting it to torch.float32.
warning_cache.warn(
--------------------------------------------------------------------------------
DATALOADER:0 TEST RESULTS
{'sentence_count': 1.0}
--------------------------------------------------------------------------------
Testing: 100%|████████████████████████████████████| 1/1 [00:06<00:00, 6.66s/it]
In [22]:
%cat /DATA/temp/auto_avsr/result.txt
predicted:올림픽에서 우리나라가 출전하지 않은 종목에는 관심이 없어요
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: